library(ggplot2)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(vegan)
## Warning: package 'vegan' was built under R version 3.4.2
## Loading required package: permute
## Warning: package 'permute' was built under R version 3.4.2
## Loading required package: lattice
## This is vegan 2.4-4
library(reshape2)
library(VennDiagram)
## Warning: package 'VennDiagram' was built under R version 3.4.2
## Loading required package: grid
## Loading required package: futile.logger
## Warning: package 'futile.logger' was built under R version 3.4.2
library(knitr)
## Warning: package 'knitr' was built under R version 3.4.3

Replication of figures 1, 3, and 4 (and Table 1) of “Gene expression patterns associated with caste and reproductive status in ants: worker-specific genes are more derived than queen-specific ones” by Feldmeyer, Elsner, and Foitzik, 2013

This paper involved sequencing total RNA from four different phenotypes of female ants that occur in colonies of Temnothorax longispinosus

This is interesting because ant colonies are composed of one or more queens and their numerous worker offspring, who are usually full-, or sometimes half-sibs in terms of parentage but who actually share 75% as opposed to 50% of their DNA with full-sibs due to haplodiploidy in ants. Nonetheless, these nearly identical genomes produce extremely different looking and behaving phenotypes, presumably based on patterns of gene expression.

I downloaded the gene expression spreadsheet they published and converted to csv. To do this had to combine two different sheets, one with just the expression values, and one with the p and fold change values, within the overall excel file so I could just use the one csv for all analyses.

a <- read.csv("mec12490-sup-0005-TableS3.csv", header = TRUE, sep = ",", stringsAsFactors = FALSE)

head(a)
##             Feature.ID  Annotations...RefSeq.protein.ID
## 1     TlongiContigs_c1 gi|327281562|ref|XP_003225516.1|
## 2    TlongiContigs_c10      gi|322794755|gb|EFZ17702.1|
## 3   TlongiContigs_c100      gi|307210970|gb|EFN87271.1|
## 4  TlongiContigs_c1000      gi|332019357|gb|EGI59858.1|
## 5 TlongiContigs_c10004                                 
## 6  TlongiContigs_c1001      gi|332025295|gb|EGI65466.1|
##                                        Annotations...RefSeq
## 1              PREDICTED: hypothetical protein LOC100561123
## 2                           hypothetical protein SINV_02084
## 3 Major facilitator superfamily domain-containing protein 8
## 4                            hypothetical protein G5I_11953
## 5                                                          
## 6              Cell division control protein 6-like protein
##   queen...Q.RNA.Seq...Expression.values queen...Q.RNA.Seq...Gene.length
## 1                             1.4214700                            4144
## 2                             2.7708168                            2899
## 3                            11.1631735                            4931
## 4                             1.2578753                            1153
## 5                             0.7968847                            1232
## 6                             8.1262000                            2441
##   queen...Q.RNA.Seq...Unique.gene.reads
## 1                                   264
## 2                                   360
## 3                                  1390
## 4                                    65
## 5                                    18
## 6                                   593
##   queen...Q.RNA.Seq...Total.gene.reads queen...Q.RNA.Seq...RPKM
## 1                                  264                1.4214700
## 2                                  360                2.7708168
## 3                                 2467               11.1631735
## 4                                   65                1.2578753
## 5                                   44                0.7968847
## 6                                  889                8.1262000
##   fertile...FB1.RNA.Seq...Expression.values
## 1                                 1.5229346
## 2                                 4.4192524
## 3                                 5.8170108
## 4                                 0.8210374
## 5                                 5.7116968
## 6                                 1.9261472
##   fertile...FB1.RNA.Seq...Gene.length
## 1                                4144
## 2                                2899
## 3                                4931
## 4                                1153
## 5                                1232
## 6                                2441
##   fertile...FB1.RNA.Seq...Unique.gene.reads
## 1                                       200
## 2                                       406
## 3                                       563
## 4                                        30
## 5                                        77
## 6                                        93
##   fertile...FB1.RNA.Seq...Total.gene.reads fertile...FB1.RNA.Seq...RPKM
## 1                                      200                    1.5229346
## 2                                      406                    4.4192524
## 3                                      909                    5.8170108
## 4                                       30                    0.8210374
## 5                                      223                    5.7116968
## 6                                      149                    1.9261472
##   infertile...IFB1.RNA.Seq...Expression.values
## 1                                    1.7337514
## 2                                    2.7698933
## 3                                   10.4028395
## 4                                    0.3665459
## 5                                    3.7305788
## 6                                    1.4933066
##   infertile...IFB1.RNA.Seq...Gene.length
## 1                                   4144
## 2                                   2899
## 3                                   4931
## 4                                   1153
## 5                                   1232
## 6                                   2441
##   infertile...IFB1.RNA.Seq...Unique.gene.reads
## 1                                          136
## 2                                          152
## 3                                          552
## 4                                            8
## 5                                           36
## 6                                           32
##   infertile...IFB1.RNA.Seq...Total.gene.reads
## 1                                         136
## 2                                         152
## 3                                         971
## 4                                           8
## 5                                          87
## 6                                          69
##   infertile...IFB1.RNA.Seq...RPKM forager...W2.RNA.Seq...Expression.values
## 1                       1.7337514                                0.8068826
## 2                       2.7698933                                3.5270793
## 3                      10.4028395                                8.5499827
## 4                       0.3665459                                0.3432389
## 5                       3.7305788                                3.9924209
## 6                       1.4933066                                1.6146626
##   forager...W2.RNA.Seq...Gene.length
## 1                               4144
## 2                               2899
## 3                               4931
## 4                               1153
## 5                               1232
## 6                               2441
##   forager...W2.RNA.Seq...Unique.gene.reads
## 1                                      414
## 2                                     1266
## 3                                     3758
## 4                                       49
## 5                                      249
## 6                                      288
##   forager...W2.RNA.Seq...Total.gene.reads forager...W2.RNA.Seq...RPKM
## 1                                     414                   0.8068826
## 2                                    1266                   3.5270793
## 3                                    5220                   8.5499827
## 4                                      49                   0.3432389
## 5                                     609                   3.9924209
## 6                                     488                   1.6146626
##   Experiment...Range..original.values. Experiment...IQR..original.values.
## 1                            0.9268689                          0.3122814
## 2                            1.6493591                          1.6484356
## 3                            5.3461626                          2.6131907
## 4                            0.9146364                          0.8913294
## 5                            4.9148121                          1.9811180
## 6                            6.6328934                          6.5115374
##   Experiment...Difference..original.values.
## 1                                -0.9268689
## 2                                -1.6493591
## 3                                -5.3461626
## 4                                -0.9146364
## 5                                 4.9148121
## 6                                -6.6328934
##   Experiment...Fold.Change..original.values.
## 1                                  -2.148704
## 2                                  -1.595459
## 3                                  -1.919057
## 4                                  -3.664722
## 5                                   7.167532
## 6                                  -5.441749
##   Kal.s.Z.test..queen.vs.fertile.original.values...Proportions.difference
## 1                                                            -7.78920e-08
## 2                                                             2.59240e-06
## 3                                                            -1.22325e-05
## 4                                                            -1.06515e-06
## 5                                                             9.14617e-06
## 6                                                            -1.32713e-05
##   Kal.s.Z.test..queen.vs.fertile.original.values...Proportions.fold.change
## 1                                                                -1.027037
## 2                                                                 1.449477
## 3                                                                -2.111628
## 4                                                                -1.685793
## 5                                                                 6.513885
## 6                                                                -4.642240
##   Kal.s.Z.test..queen.vs.fertile.original.values...Test.statistic
## 1                                                     -0.02287566
## 2                                                      0.48720921
## 3                                                     -1.49598382
## 4                                                     -0.37228314
## 5                                                      1.80665915
## 6                                                     -2.10941321
##   Kal.s.Z.test..queen.vs.fertile.original.values...P.value
## 1                                               0.98174946
## 2                                               0.62611008
## 3                                               0.13465787
## 4                                               0.70968205
## 5                                               0.07081544
## 6                                               0.03490893
##   Kal.s.Z.test..queen.vs.fertile.original.values...FDR.p.value.correction
## 1                                                               1.0000000
## 2                                                               0.9954923
## 3                                                               0.6309522
## 4                                                               0.9970280
## 5                                                               0.4362982
## 6                                                               0.2774690
##   Kal.s.Z.test..queen.vs.infertile.original.values...Proportions.difference
## 1                                                               2.37611e-07
## 2                                                              -6.60812e-07
## 3                                                              -4.05725e-06
## 4                                                              -1.94254e-06
## 5                                                               5.21921e-06
## 6                                                              -1.41619e-05
##   Kal.s.Z.test..queen.vs.infertile.original.values...Proportions.fold.change
## 1                                                                   1.080305
## 2                                                                  -1.129399
## 3                                                                  -1.211542
## 4                                                                  -3.874467
## 5                                                                   4.146465
## 6                                                                  -6.143859
##   Kal.s.Z.test..queen.vs.infertile.original.values...Test.statistic
## 1                                                        0.06828384
## 2                                                       -0.14330511
## 3                                                       -0.44598146
## 4                                                       -0.77801022
## 5                                                        1.25211222
## 6                                                       -2.33083920
##   Kal.s.Z.test..queen.vs.infertile.original.values...P.value
## 1                                                 0.94555969
## 2                                                 0.88604922
## 3                                                 0.65561064
## 4                                                 0.43656299
## 5                                                 0.21052897
## 6                                                 0.01976184
##   Kal.s.Z.test..queen.vs.infertile.original.values...FDR.p.value.correction
## 1                                                                 1.0000000
## 2                                                                 1.0000000
## 3                                                                 0.9963725
## 4                                                                 0.9621052
## 5                                                                 0.7843877
## 6                                                                 0.1811111
##   Kal.s.Z.test..queen.vs.forager.original.values...Proportions.difference
## 1                                                            -1.41195e-06
## 2                                                             9.94316e-07
## 3                                                            -6.84516e-06
## 4                                                            -1.96029e-06
## 5                                                             5.99527e-06
## 6                                                            -1.38195e-05
##   Kal.s.Z.test..queen.vs.forager.original.values...Proportions.fold.change
## 1                                                                -1.912758
## 2                                                                 1.172397
## 3                                                                -1.417604
## 4                                                                -3.978998
## 5                                                                 4.614325
## 6                                                                -5.464348
##   Kal.s.Z.test..queen.vs.forager.original.values...Test.statistic
## 1                                                      -0.4734871
## 2                                                       0.1983390
## 3                                                      -0.7717740
## 4                                                      -0.7755138
## 5                                                       1.3713674
## 6                                                      -2.2165468
##   Kal.s.Z.test..queen.vs.forager.original.values...P.value
## 1                                               0.63586573
## 2                                               0.84277982
## 3                                               0.44024827
## 4                                               0.43803609
## 5                                               0.17026046
## 6                                               0.02665408
##   Kal.s.Z.test..queen.vs.forager.original.values...FDR.p.value.correction
## 1                                                               1.0000000
## 2                                                               1.0000000
## 3                                                               1.0000000
## 4                                                               1.0000000
## 5                                                               0.7578655
## 6                                                               0.2376383
##   Kal.s.Z.test..fertile.vs.infertile.original.values...Proportions.difference
## 1                                                                 3.15503e-07
## 2                                                                -3.25321e-06
## 3                                                                 8.17526e-06
## 4                                                                -8.77382e-07
## 5                                                                -3.92696e-06
## 6                                                                -8.90563e-07
##   Kal.s.Z.test..fertile.vs.infertile.original.values...Proportions.fold.change
## 1                                                                     1.109513
## 2                                                                    -1.637039
## 3                                                                     1.742926
## 4                                                                    -2.298305
## 5                                                                    -1.570949
## 6                                                                    -1.323469
##   Kal.s.Z.test..fertile.vs.infertile.original.values...Test.statistic
## 1                                                          0.09361524
## 2                                                         -0.64968830
## 3                                                          1.08695529
## 4                                                         -0.43110877
## 5                                                         -0.68430632
## 6                                                         -0.25787956
##   Kal.s.Z.test..fertile.vs.infertile.original.values...P.value
## 1                                                    0.9254148
## 2                                                    0.5158936
## 3                                                    0.2770566
## 4                                                    0.6663893
## 5                                                    0.4937818
## 6                                                    0.7964999
##   Kal.s.Z.test..fertile.vs.infertile.original.values...FDR.p.value.correction
## 1                                                                           1
## 2                                                                           1
## 3                                                                           1
## 4                                                                           1
## 5                                                                           1
## 6                                                                           1
##   Kal.s.Z.test..fertile.vs.forager.original.values...Proportions.difference
## 1                                                              -1.33406e-06
## 2                                                              -1.59808e-06
## 3                                                               5.38735e-06
## 4                                                              -8.95135e-07
## 5                                                              -3.15090e-06
## 6                                                              -5.48197e-07
##   Kal.s.Z.test..fertile.vs.forager.original.values...Proportions.fold.change
## 1                                                                  -1.862404
## 2                                                                  -1.236336
## 3                                                                   1.489575
## 4                                                                  -2.360312
## 5                                                                  -1.411666
## 6                                                                  -1.177093
##   Kal.s.Z.test..fertile.vs.forager.original.values...Test.statistic
## 1                                                        -0.4589436
## 2                                                        -0.2976889
## 3                                                         0.7463464
## 4                                                        -0.4356180
## 5                                                        -0.5311333
## 6                                                        -0.1529787
##   Kal.s.Z.test..fertile.vs.forager.original.values...P.value
## 1                                                  0.6462747
## 2                                                  0.7659406
## 3                                                  0.4554582
## 4                                                  0.6631139
## 5                                                  0.5953264
## 6                                                  0.8784151
##   Kal.s.Z.test..fertile.vs.forager.original.values...FDR.p.value.correction
## 1                                                                         1
## 2                                                                         1
## 3                                                                         1
## 4                                                                         1
## 5                                                                         1
## 6                                                                         1
##   Kal.s.Z.test..infertile.vs.forager.original.values...Proportions.difference
## 1                                                                -1.64956e-06
## 2                                                                 1.65513e-06
## 3                                                                -2.78791e-06
## 4                                                                -1.77535e-08
## 5                                                                 7.76064e-07
## 6                                                                 3.42366e-07
##   Kal.s.Z.test..infertile.vs.forager.original.values...Proportions.fold.change
## 1                                                                    -2.066362
## 2                                                                     1.324105
## 3                                                                    -1.170083
## 4                                                                    -1.026979
## 5                                                                     1.112834
## 6                                                                     1.124354
##   Kal.s.Z.test..infertile.vs.forager.original.values...Test.statistic
## 1                                                          -0.5504647
## 2                                                           0.3508314
## 3                                                          -0.3406257
## 4                                                          -0.0112086
## 5                                                           0.1485381
## 6                                                           0.1032962
##   Kal.s.Z.test..infertile.vs.forager.original.values...P.value
## 1                                                    0.5820007
## 2                                                    0.7257148
## 3                                                    0.7333854
## 4                                                    0.9910570
## 5                                                    0.8819181
## 6                                                    0.9177279
##   Kal.s.Z.test..infertile.vs.forager.original.values...FDR.p.value.correction
## 1                                                                           1
## 2                                                                           1
## 3                                                                           1
## 4                                                                           1
## 5                                                                           1
## 6                                                                           1
##    X X.1 X.2 X.3 X.4 X.5 X.6 X.7 X.8 X.9 X.10 X.11 X.12 X.13 X.14 X.15
## 1 NA  NA  NA  NA  NA  NA  NA  NA  NA  NA   NA   NA   NA   NA   NA   NA
## 2 NA  NA  NA  NA  NA  NA  NA  NA  NA  NA   NA   NA   NA   NA   NA   NA
## 3 NA  NA  NA  NA  NA  NA  NA  NA  NA  NA   NA   NA   NA   NA   NA   NA
## 4 NA  NA  NA  NA  NA  NA  NA  NA  NA  NA   NA   NA   NA   NA   NA   NA
## 5 NA  NA  NA  NA  NA  NA  NA  NA  NA  NA   NA   NA   NA   NA   NA   NA
## 6 NA  NA  NA  NA  NA  NA  NA  NA  NA  NA   NA   NA   NA   NA   NA   NA
##   X.16 X.17 X.18 X.19 X.20 X.21 X.22 X.23 X.24 X.25 X.26 X.27 X.28
## 1   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 2   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 3   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 4   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 5   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 6   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA

The four sampled categories of female are called queen, fertile (worker), infertile (worker), and forager (worker). The columns with those names display the reads-per-mapped-kilobase normalized expression values for the gene in each row. The columns including the word “fold” are the fold change between two categories that were compared according to their expression values. The columns containing “p” are the false discovery rate-corrected p values from Z tests comparing the expression values. The typical cutoff for significance many people (and these authors) use for gene expression is p < 0.05 and fold change > 2. A spreadsheet like this is produced series command-line based bioinformatics packages such as Tuxedo, which estimate expression levels for genes based on the population of sequence fragments resulting from sequencing that map to the gene in question. Because fragments are typically shorter than genes, complex statistics are used to estimate gene copy number (expression level) based on the makeup of the fragment population.

Here I am renaming some of the columns for ease of use.

names(a)
##  [1] "Feature.ID"                                                                  
##  [2] "Annotations...RefSeq.protein.ID"                                             
##  [3] "Annotations...RefSeq"                                                        
##  [4] "queen...Q.RNA.Seq...Expression.values"                                       
##  [5] "queen...Q.RNA.Seq...Gene.length"                                             
##  [6] "queen...Q.RNA.Seq...Unique.gene.reads"                                       
##  [7] "queen...Q.RNA.Seq...Total.gene.reads"                                        
##  [8] "queen...Q.RNA.Seq...RPKM"                                                    
##  [9] "fertile...FB1.RNA.Seq...Expression.values"                                   
## [10] "fertile...FB1.RNA.Seq...Gene.length"                                         
## [11] "fertile...FB1.RNA.Seq...Unique.gene.reads"                                   
## [12] "fertile...FB1.RNA.Seq...Total.gene.reads"                                    
## [13] "fertile...FB1.RNA.Seq...RPKM"                                                
## [14] "infertile...IFB1.RNA.Seq...Expression.values"                                
## [15] "infertile...IFB1.RNA.Seq...Gene.length"                                      
## [16] "infertile...IFB1.RNA.Seq...Unique.gene.reads"                                
## [17] "infertile...IFB1.RNA.Seq...Total.gene.reads"                                 
## [18] "infertile...IFB1.RNA.Seq...RPKM"                                             
## [19] "forager...W2.RNA.Seq...Expression.values"                                    
## [20] "forager...W2.RNA.Seq...Gene.length"                                          
## [21] "forager...W2.RNA.Seq...Unique.gene.reads"                                    
## [22] "forager...W2.RNA.Seq...Total.gene.reads"                                     
## [23] "forager...W2.RNA.Seq...RPKM"                                                 
## [24] "Experiment...Range..original.values."                                        
## [25] "Experiment...IQR..original.values."                                          
## [26] "Experiment...Difference..original.values."                                   
## [27] "Experiment...Fold.Change..original.values."                                  
## [28] "Kal.s.Z.test..queen.vs.fertile.original.values...Proportions.difference"     
## [29] "Kal.s.Z.test..queen.vs.fertile.original.values...Proportions.fold.change"    
## [30] "Kal.s.Z.test..queen.vs.fertile.original.values...Test.statistic"             
## [31] "Kal.s.Z.test..queen.vs.fertile.original.values...P.value"                    
## [32] "Kal.s.Z.test..queen.vs.fertile.original.values...FDR.p.value.correction"     
## [33] "Kal.s.Z.test..queen.vs.infertile.original.values...Proportions.difference"   
## [34] "Kal.s.Z.test..queen.vs.infertile.original.values...Proportions.fold.change"  
## [35] "Kal.s.Z.test..queen.vs.infertile.original.values...Test.statistic"           
## [36] "Kal.s.Z.test..queen.vs.infertile.original.values...P.value"                  
## [37] "Kal.s.Z.test..queen.vs.infertile.original.values...FDR.p.value.correction"   
## [38] "Kal.s.Z.test..queen.vs.forager.original.values...Proportions.difference"     
## [39] "Kal.s.Z.test..queen.vs.forager.original.values...Proportions.fold.change"    
## [40] "Kal.s.Z.test..queen.vs.forager.original.values...Test.statistic"             
## [41] "Kal.s.Z.test..queen.vs.forager.original.values...P.value"                    
## [42] "Kal.s.Z.test..queen.vs.forager.original.values...FDR.p.value.correction"     
## [43] "Kal.s.Z.test..fertile.vs.infertile.original.values...Proportions.difference" 
## [44] "Kal.s.Z.test..fertile.vs.infertile.original.values...Proportions.fold.change"
## [45] "Kal.s.Z.test..fertile.vs.infertile.original.values...Test.statistic"         
## [46] "Kal.s.Z.test..fertile.vs.infertile.original.values...P.value"                
## [47] "Kal.s.Z.test..fertile.vs.infertile.original.values...FDR.p.value.correction" 
## [48] "Kal.s.Z.test..fertile.vs.forager.original.values...Proportions.difference"   
## [49] "Kal.s.Z.test..fertile.vs.forager.original.values...Proportions.fold.change"  
## [50] "Kal.s.Z.test..fertile.vs.forager.original.values...Test.statistic"           
## [51] "Kal.s.Z.test..fertile.vs.forager.original.values...P.value"                  
## [52] "Kal.s.Z.test..fertile.vs.forager.original.values...FDR.p.value.correction"   
## [53] "Kal.s.Z.test..infertile.vs.forager.original.values...Proportions.difference" 
## [54] "Kal.s.Z.test..infertile.vs.forager.original.values...Proportions.fold.change"
## [55] "Kal.s.Z.test..infertile.vs.forager.original.values...Test.statistic"         
## [56] "Kal.s.Z.test..infertile.vs.forager.original.values...P.value"                
## [57] "Kal.s.Z.test..infertile.vs.forager.original.values...FDR.p.value.correction" 
## [58] "X"                                                                           
## [59] "X.1"                                                                         
## [60] "X.2"                                                                         
## [61] "X.3"                                                                         
## [62] "X.4"                                                                         
## [63] "X.5"                                                                         
## [64] "X.6"                                                                         
## [65] "X.7"                                                                         
## [66] "X.8"                                                                         
## [67] "X.9"                                                                         
## [68] "X.10"                                                                        
## [69] "X.11"                                                                        
## [70] "X.12"                                                                        
## [71] "X.13"                                                                        
## [72] "X.14"                                                                        
## [73] "X.15"                                                                        
## [74] "X.16"                                                                        
## [75] "X.17"                                                                        
## [76] "X.18"                                                                        
## [77] "X.19"                                                                        
## [78] "X.20"                                                                        
## [79] "X.21"                                                                        
## [80] "X.22"                                                                        
## [81] "X.23"                                                                        
## [82] "X.24"                                                                        
## [83] "X.25"                                                                        
## [84] "X.26"                                                                        
## [85] "X.27"                                                                        
## [86] "X.28"
#the names are really long so I'm renaming the ones I need

colnames(a)[colnames(a)=="Feature.ID"] <- "ID"
colnames(a)[colnames(a)=="Annotations...RefSeq.protein.ID"] <- "annotation"
colnames(a)[colnames(a)=="queen...Q.RNA.Seq...RPKM"] <- "queen"
colnames(a)[colnames(a)=="fertile...FB1.RNA.Seq...RPKM"] <- "fertile"
colnames(a)[colnames(a)=="infertile...IFB1.RNA.Seq...RPKM"] <- "infertile"
colnames(a)[colnames(a)=="forager...W2.RNA.Seq...RPKM"] <- "forager"
colnames(a)[colnames(a)=="Kal.s.Z.test..queen.vs.fertile.original.values...FDR.p.value.correction"] <- "queenfertile_p"
colnames(a)[colnames(a)=="Kal.s.Z.test..queen.vs.fertile.original.values...Proportions.fold.change"] <- "queenfertile_fold"
colnames(a)[colnames(a)=="Kal.s.Z.test..queen.vs.infertile.original.values...Proportions.fold.change"] <- "queeninfertile_fold"
colnames(a)[colnames(a)=="Kal.s.Z.test..queen.vs.infertile.original.values...FDR.p.value.correction"] <- "queeninfertile_p"
colnames(a)[colnames(a)=="Kal.s.Z.test..queen.vs.forager.original.values...Proportions.fold.change"] <- "queenforager_fold"
colnames(a)[colnames(a)=="Kal.s.Z.test..queen.vs.forager.original.values...FDR.p.value.correction"] <- "queenforager_p"
colnames(a)[colnames(a)=="Kal.s.Z.test..fertile.vs.infertile.original.values...Proportions.fold.change"] <- "fertileinfertile_fold"
colnames(a)[colnames(a)=="Kal.s.Z.test..fertile.vs.infertile.original.values...FDR.p.value.correction"] <- "fertileinfertile_p"
colnames(a)[colnames(a)=="Kal.s.Z.test..fertile.vs.forager.original.values...Proportions.fold.change"] <- "fertileforager_fold"
colnames(a)[colnames(a)=="Kal.s.Z.test..fertile.vs.forager.original.values...FDR.p.value.correction"] <- "fertileforager_p"
colnames(a)[colnames(a)=="Kal.s.Z.test..infertile.vs.forager.original.values...Proportions.fold.change"] <- "infertileforager_fold"
colnames(a)[colnames(a)=="Kal.s.Z.test..infertile.vs.forager.original.values...FDR.p.value.correction"] <- "infertileforager_p"

names(a)
##  [1] "ID"                                                                         
##  [2] "annotation"                                                                 
##  [3] "Annotations...RefSeq"                                                       
##  [4] "queen...Q.RNA.Seq...Expression.values"                                      
##  [5] "queen...Q.RNA.Seq...Gene.length"                                            
##  [6] "queen...Q.RNA.Seq...Unique.gene.reads"                                      
##  [7] "queen...Q.RNA.Seq...Total.gene.reads"                                       
##  [8] "queen"                                                                      
##  [9] "fertile...FB1.RNA.Seq...Expression.values"                                  
## [10] "fertile...FB1.RNA.Seq...Gene.length"                                        
## [11] "fertile...FB1.RNA.Seq...Unique.gene.reads"                                  
## [12] "fertile...FB1.RNA.Seq...Total.gene.reads"                                   
## [13] "fertile"                                                                    
## [14] "infertile...IFB1.RNA.Seq...Expression.values"                               
## [15] "infertile...IFB1.RNA.Seq...Gene.length"                                     
## [16] "infertile...IFB1.RNA.Seq...Unique.gene.reads"                               
## [17] "infertile...IFB1.RNA.Seq...Total.gene.reads"                                
## [18] "infertile"                                                                  
## [19] "forager...W2.RNA.Seq...Expression.values"                                   
## [20] "forager...W2.RNA.Seq...Gene.length"                                         
## [21] "forager...W2.RNA.Seq...Unique.gene.reads"                                   
## [22] "forager...W2.RNA.Seq...Total.gene.reads"                                    
## [23] "forager"                                                                    
## [24] "Experiment...Range..original.values."                                       
## [25] "Experiment...IQR..original.values."                                         
## [26] "Experiment...Difference..original.values."                                  
## [27] "Experiment...Fold.Change..original.values."                                 
## [28] "Kal.s.Z.test..queen.vs.fertile.original.values...Proportions.difference"    
## [29] "queenfertile_fold"                                                          
## [30] "Kal.s.Z.test..queen.vs.fertile.original.values...Test.statistic"            
## [31] "Kal.s.Z.test..queen.vs.fertile.original.values...P.value"                   
## [32] "queenfertile_p"                                                             
## [33] "Kal.s.Z.test..queen.vs.infertile.original.values...Proportions.difference"  
## [34] "queeninfertile_fold"                                                        
## [35] "Kal.s.Z.test..queen.vs.infertile.original.values...Test.statistic"          
## [36] "Kal.s.Z.test..queen.vs.infertile.original.values...P.value"                 
## [37] "queeninfertile_p"                                                           
## [38] "Kal.s.Z.test..queen.vs.forager.original.values...Proportions.difference"    
## [39] "queenforager_fold"                                                          
## [40] "Kal.s.Z.test..queen.vs.forager.original.values...Test.statistic"            
## [41] "Kal.s.Z.test..queen.vs.forager.original.values...P.value"                   
## [42] "queenforager_p"                                                             
## [43] "Kal.s.Z.test..fertile.vs.infertile.original.values...Proportions.difference"
## [44] "fertileinfertile_fold"                                                      
## [45] "Kal.s.Z.test..fertile.vs.infertile.original.values...Test.statistic"        
## [46] "Kal.s.Z.test..fertile.vs.infertile.original.values...P.value"               
## [47] "fertileinfertile_p"                                                         
## [48] "Kal.s.Z.test..fertile.vs.forager.original.values...Proportions.difference"  
## [49] "fertileforager_fold"                                                        
## [50] "Kal.s.Z.test..fertile.vs.forager.original.values...Test.statistic"          
## [51] "Kal.s.Z.test..fertile.vs.forager.original.values...P.value"                 
## [52] "fertileforager_p"                                                           
## [53] "Kal.s.Z.test..infertile.vs.forager.original.values...Proportions.difference"
## [54] "infertileforager_fold"                                                      
## [55] "Kal.s.Z.test..infertile.vs.forager.original.values...Test.statistic"        
## [56] "Kal.s.Z.test..infertile.vs.forager.original.values...P.value"               
## [57] "infertileforager_p"                                                         
## [58] "X"                                                                          
## [59] "X.1"                                                                        
## [60] "X.2"                                                                        
## [61] "X.3"                                                                        
## [62] "X.4"                                                                        
## [63] "X.5"                                                                        
## [64] "X.6"                                                                        
## [65] "X.7"                                                                        
## [66] "X.8"                                                                        
## [67] "X.9"                                                                        
## [68] "X.10"                                                                       
## [69] "X.11"                                                                       
## [70] "X.12"                                                                       
## [71] "X.13"                                                                       
## [72] "X.14"                                                                       
## [73] "X.15"                                                                       
## [74] "X.16"                                                                       
## [75] "X.17"                                                                       
## [76] "X.18"                                                                       
## [77] "X.19"                                                                       
## [78] "X.20"                                                                       
## [79] "X.21"                                                                       
## [80] "X.22"                                                                       
## [81] "X.23"                                                                       
## [82] "X.24"                                                                       
## [83] "X.25"                                                                       
## [84] "X.26"                                                                       
## [85] "X.27"                                                                       
## [86] "X.28"

Figure 1a, NMDS plot

as published

In this NMDS plot, all the genes that are significantly differentially expressed (p < 0.05 and fold change > 2) are plotted in four dimensions based on the four expression values each gene has for the four categories sampled, in a manner that should minimize “stress”, or how hard it is for the compressed 2D plot to accurately depict the distances between the points. However, this NMDS particular plot does not result in finding a real minimum. The algorithm finds a somewhat low stress value to settle at but does not establish it as a minimum.

The purpose of this figure is to illustrate any distinctive clusters of genes that appear, indicating differences in gene expression patterns associated with each phenotype. The distance between the red labels, which are actually calculated as “species scores”, or weighted averages for all the expression values belonging to that category, illustrate roughly how different the sampled females are from each other under this mode of analysis.

#filtering for genes that have p<0.05 and fc>2 for at least one of the six different pairwise comparisons. These genes are referred to as "DEGs", differentially expressed genes 
b <- filter(a, queenfertile_p<0.05 & abs(queenfertile_fold)>2 | queeninfertile_p<0.05 & abs(queeninfertile_fold)>2 | queenforager_p<0.05 & abs(queenforager_fold)>2 | fertileinfertile_p<0.05 & abs(fertileinfertile_fold)>2 | fertileforager_p<0.05 & abs(fertileforager_fold)>2 | infertileforager_p<0.05 & abs(infertileforager_fold)>2)

#selecting just the expression values
s <- mutate(b) %>% select(queen, fertile, infertile, forager)

#performing nmds
#s.mds <- metaMDS(s)

#s.mds

#plotting using points
#plot(s.mds, type = "p")

#My NMDS plot is almost exactly the same as the published one, but mine is rotated in the opposite orientation

#plotting using text to show that the red labels match up to their positions in the published figure.
#plot(s.mds, type = "t")

When I filtered for genes that had p<0.05 and fc>2 for at least one of the six pairwise comparisons, I obtained an NMDS plot almost identical to the published version, except that it was rotated by a multiple of 90 degrees from the published orientation. This does not reflect any meaningful difference.

Figure 1b, Venn diagram

as published

This Venn diagram is supposed to show the subsets of diffentially expressed genes that were expressed in the four different categories and how these lists overlap.

The vegan package I (and the authors) used for the diagram outputs to a file, not directly to R, so I am embedding the resulting image.

# SECOND CLOSEST
q_nonzero <- filter(a, queen != 0 & queenfertile_p<0.05 & abs(queenfertile_fold)>2 & queeninfertile_p<0.05 & abs(queeninfertile_fold)>2 & queenforager_p<0.05 & abs(queenforager_fold)>2)
q_nonzero <- mutate(q_nonzero) %>% select(ID)

fert_nonzero <- filter(a, fertile != 0 & queenfertile_p<0.05 & abs(queenfertile_fold)>2 & fertileinfertile_p<0.05 & abs(fertileinfertile_fold)>2 & fertileforager_p<0.05 & abs(fertileforager_fold)>2)
fert_nonzero <- mutate(fert_nonzero) %>% select(ID)

infert_nonzero <- filter(a, infertile != 0 & fertileinfertile_p<0.05 & abs(fertileinfertile_fold)>2 & queeninfertile_p<0.05 & abs(queeninfertile_fold)>2 & infertileforager_p<0.05 & abs(infertileforager_fold)>2)
infert_nonzero <- mutate(infert_nonzero) %>% select(ID)

for_nonzero <- filter(a, forager != 0 & infertileforager_p<0.05 & abs(infertileforager_fold)>2 & fertileforager_p<0.05 & abs(fertileforager_fold)>2 & queenforager_p<0.05 & abs(queenforager_fold)>2)
for_nonzero <- mutate(for_nonzero) %>% select(ID)

####
#CLOSEST
q_nonzero <- filter(a, queen != 0 & queenfertile_p<0.05 & queeninfertile_p<0.05 & queenforager_p<0.05)
q_nonzero <- mutate(q_nonzero) %>% select(ID)

fert_nonzero <- filter(a, fertile != 0 & queenfertile_p<0.05 & fertileinfertile_p<0.05 & fertileforager_p<0.05)
fert_nonzero <- mutate(fert_nonzero) %>% select(ID)

infert_nonzero <- filter(a, infertile != 0 & fertileinfertile_p<0.05 & queeninfertile_p<0.05 & infertileforager_p<0.05)
infert_nonzero <- mutate(infert_nonzero) %>% select(ID)

for_nonzero <- filter(a, forager != 0 & infertileforager_p<0.05 & fertileforager_p<0.05 & queenforager_p<0.05)
for_nonzero <- mutate(for_nonzero) %>% select(ID)


venn.diagram(list(Queen=q_nonzero$ID, Forager=for_nonzero$ID, Fertile=fert_nonzero$ID, Infertile=infert_nonzero$ID), "replication_venn.tiff", fill = c("blue", "red", "yellow", "green"), cex=3,cat.cex=1.8) 
## [1] 1

I did not replicate figure 2 because the vast majority of the process behind that graph was not done in R. It needs to be done using BLASTx rather than R. Unfortunately the authors did not publish the raw results of the BLAST they did.

Figure 3a, pairwise pie

as published

This pie should show the subsets of differentially expressed genes that are statistically different based on p value and fold change between the different pairwise comparisons that can be made.

qfert <- filter(a, queenfertile_p<0.05 & abs(queenfertile_fold)>2)
qfert <- mutate(qfert) %>% select(ID)

qinfert <- filter(a, queeninfertile_p<0.05 & abs(queeninfertile_fold)>2)
qinfert <- mutate(qinfert) %>% select(ID)

qfor <- filter(a, queenforager_p<0.05 & abs(queenforager_fold)>2)
qfor <- mutate(qfor) %>% select(ID)

fertinfert <- filter(a, fertileinfertile_p<0.05 & abs(fertileinfertile_fold)>2)
fertinfert <- mutate(fertinfert) %>% select(ID)

fertfor <- filter(a, fertileforager_p<0.05 & abs(fertileforager_fold)>2)
fertfor <- mutate(fertfor) %>% select(ID)

infertfor <- filter(a, infertileforager_p<0.05 & abs(infertileforager_fold)>2)
infertfor <- mutate(infertfor) %>% select(ID)

slices <- c(nrow(qfert), nrow(qinfert), nrow(qfor), nrow(fertinfert), nrow(fertfor), nrow(infertfor)) 
lbls <- c("q vs. fer", "q vs. infer", "q vs. for", "fer vs. inf", "fer vs. for", "inf vs. for")
lbls <- paste(lbls, slices) # add percents to labels 
pie(slices,labels = lbls, col=rainbow(length(lbls)),
    main="Pie Chart of Pairwise Differences")

Figure 3b, up-regulated pie

as published

This pie should show the subsets of differentially expressed genes that are uniquely upregulated in one category relative to all the others. These would suggest that these genes are fundementally important for creating the unique phenotype in question, probably being necessarily for developping certain structures needed for its behavior and role.

#b <- filter(a, queenfertile_p<0.05 & abs(queenfertile_fold)>2 | queeninfertile_p<0.05 & abs(queeninfertile_fold)>2 | queenforager_p<0.05 & abs(queenforager_fold)>2 | fertileinfertile_p<0.05 & abs(fertileinfertile_fold)>2 | fertileforager_p<0.05 & abs(fertileforager_fold)>2 | infertileforager_p<0.05 & abs(infertileforager_fold)>2)

#q_up <- filter(b, (queen > forager) & (queen > fertile) & (queen > infertile))

#fert_up <- filter(b, (fertile > forager) & (fertile > queen) & (fertile > infertile))

#infert_up <- filter(b, (infertile > forager) & (infertile > queen) & (infertile > fertile))

#for_up <- filter(b, (forager > fertile) & (forager > queen) & (forager > infertile))

###

#q_up <- filter(a, queenfertile_p<0.05 & queenfertile_fold>0 & queeninfertile_p<0.05 & queeninfertile_fold>0 & queenforager_p<0.05 & queenforager_fold>0)

#fert_up <- filter(a, queenfertile_p<0.05 & queenfertile_fold < 0 & fertileinfertile_p<0.05 & fertileinfertile_fold>0 & fertileforager_p<0.05 & fertileforager_fold>0)

#infert_up <- filter(a, fertileinfertile_p<0.05 & fertileinfertile_fold < 0 & queeninfertile_p<0.05 & queeninfertile_fold < 0 & infertileforager_p<0.05 & infertileforager_fold>0)

#for_up <- filter(a, infertileforager_p<0.05 & infertileforager_fold < 0 & fertileforager_p<0.05 & fertileforager_fold < 0 & queenforager_p<0.05 & queenforager_fold < 0)

###

q_up <- filter(a, queenfertile_p<0.05 & queenfertile_fold>2 & queeninfertile_p<0.05 & queeninfertile_fold>2 & queenforager_p<0.05 & queenforager_fold>2)

fert_up <- filter(a, queenfertile_p<0.05 & queenfertile_fold < -2 & fertileinfertile_p<0.05 & fertileinfertile_fold>2 & fertileforager_p<0.05 & fertileforager_fold>2)

infert_up <- filter(a, fertileinfertile_p<0.05 & fertileinfertile_fold < -2 & queeninfertile_p<0.05 & queeninfertile_fold < -2 & infertileforager_p<0.05 & infertileforager_fold>2)

for_up <- filter(a, infertileforager_p<0.05 & infertileforager_fold < -2 & fertileforager_p<0.05 & fertileforager_fold < -2 & queenforager_p<0.05 & queenforager_fold < -2)

slices <- c(nrow(q_up), nrow(infert_up), nrow(for_up), nrow(fert_up)) 
lbls <- c("q", "inf", "for", "fer")
lbls <- paste(lbls, slices) # add percents to labels 
pie(slices,labels = lbls, col=rainbow(length(lbls)),
    main="Pie Chart of Up-Regulated Genes")

Table 1, up-regulation table

as published

This table shows “Number of significantly up-regulated genes (FDR-P < 0.05; fold change >2) in the focal caste in comparison withthe other castes and the number of pair-specific genes (genes up-regulated in only this specific comparison) with the corresponding percentage in parentheses.”

Unlike the previous depictions of numbers of differentially expressed genes between phenotypes, this table illustrates both “directions” of the comparison to differentiate between genes upregulated in one of the two compared phenotypes vs. those upregulated in the other.

# I am filtering for p<0.05 and fc>2 for each pairwise comparison in both "directions", then editing the set down to only contain the column list of gene IDs.
qfert <- filter(a, queenfertile_p<0.05 & queenfertile_fold>2)
qfert <- mutate(qfert) %>% select(ID)

fertq <- filter(a, queenfertile_p<0.05 & queenfertile_fold < -2)
fertq<- mutate(fertq) %>% select(ID)

qinfert <- filter(a, queeninfertile_p<0.05 & queeninfertile_fold>2)
qinfert <- mutate(qinfert) %>% select(ID)

infertq <- filter(a, queeninfertile_p<0.05 & queeninfertile_fold < -2)
infertq <- mutate(infertq) %>% select(ID)

qfor <- filter(a, queenforager_p<0.05 & queenforager_fold>2)
qfor <- mutate(qfor) %>% select(ID)

forq <- filter(a, queenforager_p<0.05 & queenforager_fold < -2)
forq <- mutate(forq) %>% select(ID)

fertinfert <- filter(a, fertileinfertile_p<0.05 & fertileinfertile_fold>2)
fertinfert <- mutate(fertinfert) %>% select(ID)

infertfert <- filter(a, fertileinfertile_p<0.05 & fertileinfertile_fold < -2)
infertfert <- mutate(infertfert) %>% select(ID)

fertfor <- filter(a, fertileforager_p<0.05 & fertileforager_fold>2)
fertfor <- mutate(fertfor) %>% select(ID)

forfert <- filter(a, fertileforager_p<0.05 & fertileforager_fold < -2)
forfert <- mutate(forfert) %>% select(ID)

infertfor <- filter(a, infertileforager_p<0.05 & infertileforager_fold>2)
infertfor <- mutate(infertfor) %>% select(ID)

forinfert <- filter(a, infertileforager_p<0.05 & infertileforager_fold < -2)
forinfert <- mutate(forinfert) %>% select(ID)

#Now I am filtering for the subsets of these gene lists that only appear within that selected comparison and not in any of the others, using dplyr's setdiff function.
q_fert <- dplyr::setdiff(qfert, union(fertq, qinfert, infertq, qfor, forq, fertinfert, infertfert, fertfor, forfert, infertfor, forinfert))
fert_q <- dplyr::setdiff(fertq, union(qfert, qinfert, infertq, qfor, forq, fertinfert, infertfert, fertfor, forfert, infertfor, forinfert))
q_infert <- dplyr::setdiff(qinfert, union(qfert, fertq, infertq, qfor, forq, fertinfert, infertfert, fertfor, forfert, infertfor, forinfert))
infert_q <- dplyr::setdiff(infertq, union(qfert, fertq, qinfert, qfor, forq, fertinfert, infertfert, fertfor, forfert, infertfor, forinfert))
q_for <- dplyr::setdiff(qfor, union(qfert, fertq, qinfert, infertq, forq, fertinfert, infertfert, fertfor, forfert, infertfor, forinfert))
for_q <- dplyr::setdiff(forq, union(qfert, fertq, qinfert, infertq, qfor, fertinfert, infertfert, fertfor, forfert, infertfor, forinfert))
fert_infert <- dplyr::setdiff(fertinfert, union(qfert, fertq, qinfert, infertq, qfor, forq, infertfert, fertfor, forfert, infertfor, forinfert))
infert_fert <- dplyr::setdiff(infertfert, union(qfert, fertq, qinfert, infertq, qfor, forq, fertinfert, fertfor, forfert, infertfor, forinfert))
fert_for <- dplyr::setdiff(fertfor, union(qfert, fertq, qinfert, infertq, qfor, forq, fertinfert, infertfert, forfert, infertfor, forinfert))
for_fert <- dplyr::setdiff(forfert, union(qfert, fertq, qinfert, infertq, qfor, forq, fertinfert, infertfert, fertfor, infertfor, forinfert))
infert_for <- dplyr::setdiff(infertfor, union(qfert, fertq, qinfert, infertq, qfor, forq, fertinfert, infertfert, fertfor, forfert, forinfert))
for_infert <- dplyr::setdiff(forinfert, union(qfert, fertq, qinfert, infertq, qfor, forq, fertinfert, infertfert, fertfor, forfert, infertfor))

#This will be the row in the table showing the total genes that filtered as signficant for each pairwise comparison
#Simultaneously, I am reording the gene sets to match the order in the published table
Total <- c(nrow(qfert), nrow(qinfert), nrow(qfor), nrow(fertq), nrow(fertinfert), nrow(fertfor), nrow(infertfert), nrow(infertq), nrow(infertfor),nrow(forq), nrow(forfert), nrow(forinfert))

#This will be the row in the table showing just the genes uniquely filtering into each pairwise comparison
Pair_specific <- c(nrow(q_fert), nrow(q_infert), nrow(q_for), nrow(fert_q), nrow(fert_infert), nrow(fert_for), nrow(infert_fert), nrow(infert_q), nrow(infert_for),nrow(for_q), nrow(for_fert), nrow(for_infert))

#This will be the the row showing what percent of each gene set is unique to that comparison by dividing one vector by the other
Pair_specific_percent <- Pair_specific/Total

#These vectors match the labels used in the published table
Focal_caste <- c("Queen", "", "", "Fertile worker", "", "", "Infertile worker", "", "", "Forager", "", "")
Comparison <- c("fer", "inf", "fer", "q", "inf", "for", "q", "fer", "for", "q", "fer", "inf")

table1 <- data.frame(Focal_caste, Comparison, Total, Pair_specific, Pair_specific_percent)

#kable function produces a pretty table
knitr::kable(table1)
Focal_caste Comparison Total Pair_specific Pair_specific_percent
Queen fer 1055 566 0.5364929
inf 876 376 0.4292237
fer 968 462 0.4772727
Fertile worker q 1728 1717 0.9936343
inf 273 193 0.7069597
for 236 170 0.7203390
Infertile worker q 306 118 0.3856209
fer 2043 667 0.3264807
for 194 127 0.6546392
Forager q 1753 416 0.2373075
fer 221 57 0.2579186
inf 144 77 0.5347222

Figure 4, vitellogenin bar graph

as published

Here the authors focus in on a small group of differentially expressed genes since they relate heavily to female ant phenotype. Vitellogenenin is needed for producing eggs, so it is very relevant to reproductive status. This graph shows expression of different copies of the vitellogenin protein and the vitellogenin receptor.

I had to pull out the individual genes in question using their gene id as opposed to filtering for them in R using a keyword from their description. This was necessary because not all of these proteins can be found through text search for “vg” plus wildcards and not all can be found through text search for “vit” plus wildcards. Additionally, the authors did not use some of the genes whose descriptions contain these terms since they are just precursors to the proteins. There is not way to text filter by gene name or description for this set. You can only filter them using their specific IDs.

I melted my existing data frame to fit it into the format required for a ggplot grouped bar graph.

#selecting the vitellogenin genes that the authors focused on
s <- filter(a, ID=="TlongiContigs_rep_c8852" | ID=="TlongiContigs_rep_c14651" | ID=="TlongiContigs_rep_c6184" | ID=="TlongiContigs_rep_c39314" | ID=="TlongiContigs_rep_c43820")

#reordering to fit paper
levels(s$ID) <- c("TlongiContigs_rep_c8852", "TlongiContigs_rep_c14651", "TlongiContigs_rep_c6184", "TlongiContigs_rep_c39314", "TlongiContigs_rep_c43820")

#the published figure contains significance groupings denoted by letters, so we should check the pairwise comparison p values to see whether the same groupings appear

#for each gene I am extracting the p values to compare them
Vg2 <- filter(s, ID=="TlongiContigs_rep_c8852") %>% select(ID, queenfertile_p, queeninfertile_p, queenforager_p, fertileinfertile_p, fertileforager_p, infertileforager_p)
Vg2
##                        ID queenfertile_p queeninfertile_p queenforager_p
## 1 TlongiContigs_rep_c8852    4.33584e-13      3.09115e-12              0
##   fertileinfertile_p fertileforager_p infertileforager_p
## 1        2.08868e-06      9.88682e-07        6.00246e-13
#every expression value is significantly different from the others

Vg3 <- filter(s, ID=="TlongiContigs_rep_c14651") %>% select(ID, queenfertile_p, queeninfertile_p, queenforager_p, fertileinfertile_p, fertileforager_p, infertileforager_p)
Vg3
##                         ID queenfertile_p queeninfertile_p queenforager_p
## 1 TlongiContigs_rep_c14651    2.97754e-12      2.53228e-12    5.25948e-12
##   fertileinfertile_p fertileforager_p infertileforager_p
## 1          0.2228907      3.90965e-07        6.52258e-13
#fertile and infertile expression values are not significantly different from each other; all others are significantly different

VgRec <- filter(s, ID=="TlongiContigs_rep_c6184") %>% select(ID, queenfertile_p, queeninfertile_p, queenforager_p, fertileinfertile_p, fertileforager_p, infertileforager_p)
VgRec
##                        ID queenfertile_p queeninfertile_p queenforager_p
## 1 TlongiContigs_rep_c6184    7.27345e-13                0              0
##   fertileinfertile_p fertileforager_p infertileforager_p
## 1                  1        0.1786372                  1
#queen expression value is signficantly different from all others; others are not signficantly different from each other

Vg1 <- filter(s, ID=="TlongiContigs_rep_c39314") %>% select(ID, queenfertile_p, queeninfertile_p, queenforager_p, fertileinfertile_p, fertileforager_p, infertileforager_p)
Vg1
##                         ID queenfertile_p queeninfertile_p queenforager_p
## 1 TlongiContigs_rep_c39314     0.02210215      4.30138e-13    2.79332e-12
##   fertileinfertile_p fertileforager_p infertileforager_p
## 1        1.08503e-05      0.000242488                  1
#infertile and forager expression values are not significantly different from each other; all others are signficantly different

Vg6 <- filter(s, ID=="TlongiContigs_rep_c43820") %>% select(ID, queenfertile_p, queeninfertile_p, queenforager_p, fertileinfertile_p, fertileforager_p, infertileforager_p)
Vg6
##                         ID queenfertile_p queeninfertile_p queenforager_p
## 1 TlongiContigs_rep_c43820    0.001709597        0.8957542    1.71581e-08
##   fertileinfertile_p fertileforager_p infertileforager_p
## 1         3.0524e-05      1.71499e-13        6.08174e-06
#queen and infertile values are not significantly different from each other; all others are significantly different

#selecting just the lists of gene IDs
s <- mutate(s) %>% select(ID, queen, fertile, infertile, forager)

#reformatting
new_melt <- melt(s, id.vars='ID')

#gene expression is usually plotting by either log transforming the values or using a log scale, which is what the authors did. This is because values can vary by many orders of magnitude. 
g <- ggplot(new_melt, aes(ID, value)) + geom_bar(aes(fill = variable), 
   width = 0.4, position = position_dodge(width=0.5), stat="identity") + 
scale_x_discrete(limits=c("TlongiContigs_rep_c8852", "TlongiContigs_rep_c14651", "TlongiContigs_rep_c6184", "TlongiContigs_rep_c39314", "TlongiContigs_rep_c43820"), labels=c("Vg2", "Vg3", "VgRec", "Vg1", "Vg6"), name="") + scale_y_log10(name="log RPKM") 
#adding in significance groupings based on analysis of p values
g <- g + annotate("text",x=0.81,y=1550,label="a")+
  annotate("text",x=0.94,y=88,label="c")+
  annotate("text",x=1.06,y=200,label="b")+
  annotate("text",x=1.19,y=19,label="d")
g <- g + annotate("text",x=1.81,y=1167,label="a")+
  annotate("text",x=1.94,y=75,label="b")+
  annotate("text",x=2.06,y=123,label="b")+
  annotate("text",x=2.19,y=11,label="c")
g <- g + annotate("text",x=2.81,y=127,label="a")+
  annotate("text",x=2.94,y=21,label="b")+
  annotate("text",x=3.06,y=13,label="b")+
  annotate("text",x=3.19,y=4.4,label="b")
g <- g + annotate("text",x=3.81,y=104,label="c")+
  annotate("text",x=3.94,y=177,label="b")+
  annotate("text",x=4.06,y=320,label="a")+
  annotate("text",x=4.19,y=290,label="a")
g <- g + annotate("text",x=4.81,y=74,label="b")+
  annotate("text",x=4.94,y=157,label="a")+
  annotate("text",x=5.06,y=70,label="b")+
  annotate("text",x=5.19,y=12,label="c")
g  

Despite the many differences in numbers of genes in my attempts to replicate some of the figures, this figure perfectly matches the one in the paper. Over all, ambiguity in the language used to described what counted as differentially expressed genes for the purposes of different figure probably caused the differences; the exact similarity between the publihed and replicated versions of this figure (4) and the NMDS plot in figure 1 suggest that there was no “foul play” in the data analysis but that instead this paper suffers from a lack of detail in describing its methods.